library(tidyverse)
library(knitr)
library(plotly) ; library(viridis) ; library(gridExtra) ; library(RColorBrewer)
library(biomaRt)
library(Rtsne)
library(glmnet) ; library(ROCR) ; library(car)
library(corrplot)
library(expss) ; library(knitr)
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
In 20_04_07_LR.html we performed a logistic regression but we discovered that the features were strongly correlated. This inflates the standard error of the coefficients, making them no longer interpretable.
# Clusterings
clustering_selected = 'DynamicHybridMergedSmall'
clusterings = read_csv('./../Data/clusters.csv')
clusterings$Module = clusterings[,clustering_selected] %>% data.frame %>% unlist %>% unname
assigned_module = data.frame('ID' = clusterings$ID, 'Module' = clusterings$Module)
# Original dataset
original_dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'), row.names=1)
# Model dataset
load('./../Data/LR_model.RData')
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=rownames(dataset), mart=mart)
Variance Inflation Factor (VIF) and correlation plot
# VIF
car::vif(fit) %>% sort
## absGS MTcor MM.00BCD8 MM.00BFC4 MM.B79F00 MM.E76BF3
## 1.120003 3.322214 5.760965 11.977660 28.704800 30.453238
## MM.619CFF MM.E58700 MM.D89000 MM.FF67A4 MM.39B600 MM.A3A500
## 40.019097 43.341694 48.009397 55.717881 74.444993 90.507830
## GS MM.00BF7D MM.8AAB00 MM.00A7FF MM.F8766D MM.00BD5F
## 96.406577 104.269895 124.383852 132.186820 185.578598 196.779668
## MM.EF7F49 MM.FF62BC MM.B983FF MM.FD61D1 MM.FE6E8A MM.C99800
## 217.997495 244.733566 267.532403 289.004766 293.298020 311.423166
## MM.9590FF MM.00B7E9 MM.D376FF MM.00BA38 MM.00B0F6 MM.00C0AF
## 345.026524 354.944339 367.784988 397.850985 423.554841 476.163259
## MM.6BB100 MM.F564E3 MM.00C097
## 494.468875 550.127905 1003.865351
# Correlation plot
corrplot.mixed(cor(train_set[,-ncol(train_set)]), lower = 'number', lower.col = 'gray', number.cex = .6, tl.pos = 'l', tl.col = '#666666')
rm(getinfo, mart, clusterings)
Remove all variables with a VIF>10: We would lose all but three of our variables, not ideal
Do Principal Component Regression: We would lose the relation between the prediction and the original features, which would be interesting to study
Don’t do anything: Multicollinearity affects the coefficients and p-values of the regression, but it doesn’t affect the predictions, precision of the predictions or the goodness-of-fit statistics ref, but as with the previous option, we cannot study the coefficients of the regression
Use Ridge Regression
Using the same train and test sets created in 20_04_07_LR.html
Train model
train_set$SFARI = train_set$SFARI %>% as.factor
x = train_set[,-ncol(train_set)] %>% as.matrix
y = train_set$SFARI
lambda_seq = 10^seq(1, -4, by = -.1)
fit = cv.glmnet(x, y, alpha = 0, lambda = lambda_seq, family = 'binomial')
best_lambda = fit$lambda.min
cat(paste0('The best model has a lambda of ', best_lambda))
## The best model has a lambda of 0.000398107170553497
best_fit = glmnet(x, y, alpha=0, lambda=best_lambda, family='binomial')
rm(x, y, lambda_seq, fit)
Predict labels in test set
test_set$prob = predict(best_fit, newx = test_set[,1:(ncol(train_set)-1)] %>% as.matrix, s=best_lambda, type='response') %>% as.vector
test_set$pred = test_set$prob>0.5
conf_mat = test_set %>% apply_labels(SFARI = 'Actual Labels',
prob = 'Assigned Probability',
pred = 'Label Prediction')
cro(conf_mat$SFARI, list(conf_mat$pred, total()))
|  Label Prediction |  #Total | |||
|---|---|---|---|---|
| Â FALSEÂ | Â TRUEÂ | Â | ||
|  Actual Labels | ||||
| Â Â Â FALSEÂ | 1973 | 1054 | Â | 3027 |
| Â Â Â TRUEÂ | 75 | 102 | Â | 177 |
|    #Total cases | 2048 | 1156 |  | 3204 |
rm(conf_mat)
acc = mean(test_set$SFARI==test_set$pred)
print(paste0('Accuracy = ', round(acc,4)))
## [1] "Accuracy = 0.6476"
rm(acc)
pred_ROCR = prediction(test_set$prob, test_set$SFARI)
roc_ROCR = performance(pred_ROCR, measure='tpr', x.measure='fpr')
AUC = performance(pred_ROCR, measure='auc')@y.values[[1]]
plot(roc_ROCR, main=paste0('ROC curve (AUC=',round(AUC,2),')'), col='#009999')
abline(a=0, b=1, col='#666666')
lift_ROCR = performance(pred_ROCR, measure='lift', x.measure='rpp')
plot(lift_ROCR, main='Lift curve', col='#86b300')
rm(pred_ROCR, roc_ROCR, AUC, lift_ROCR)
There is apositive relation between the coefficient assigned to the membership of each module and the percentage of SFARI genes that are assigned to that module
MTcor has a low coefficient and Gene Significance has a negative coefficient
gene_corr_info = dataset %>% mutate('ID' = rownames(dataset)) %>% dplyr::select(ID, MTcor, SFARI) %>% left_join(assigned_module, by ='ID') %>%
mutate(Module = gsub('#','',Module))
coef_info = best_fit$beta %>% as.matrix %>% data.frame %>% dplyr::rename('coef' = s0) %>% mutate('feature' = gsub('MM.','',rownames(.))) %>%
left_join(gene_corr_info, by = c('feature' = 'Module')) %>% dplyr::select(feature, coef, MTcor, SFARI) %>%
group_by(feature, coef, MTcor) %>% summarise('SFARI_perc' = mean(SFARI)) %>% arrange(desc(coef))
kable(coef_info %>% dplyr::select(feature, coef) %>% rename('Feature' = feature, 'Coefficient' = coef),
align = 'cc', caption = 'Regression Coefficients')
| Feature | Coefficient |
|---|---|
| 00C0AF | 3.0134647 |
| 00B0F6 | 2.6614085 |
| 00BA38 | 2.3979316 |
| 00A7FF | 1.5304176 |
| EF7F49 | 1.2506776 |
| D89000 | 1.1825466 |
| 6BB100 | 1.0500587 |
| 00BF7D | 0.8185542 |
| 39B600 | 0.5840582 |
| FF62BC | 0.5659054 |
| 00BCD8 | 0.2370433 |
| 9590FF | 0.2124415 |
| 00BD5F | 0.1826915 |
| 00BFC4 | 0.1129318 |
| MTcor | 0.0941246 |
| E58700 | 0.0169279 |
| C99800 | 0.0072088 |
| 619CFF | -0.0194866 |
| absGS | -0.1159901 |
| D376FF | -0.1829182 |
| B79F00 | -0.2707506 |
| F564E3 | -0.4203478 |
| GS | -0.4583694 |
| 00C097 | -0.4896129 |
| A3A500 | -0.6796896 |
| FE6E8A | -0.8069686 |
| FF67A4 | -1.0024392 |
| E76BF3 | -1.0133888 |
| F8766D | -1.1433751 |
| B983FF | -1.1490719 |
| 8AAB00 | -1.1501594 |
| FD61D1 | -1.5083219 |
| 00B7E9 | -2.6675467 |
ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
ggplot(aes(coef, SFARI_perc)) + geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) +
geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) +
theme_minimal() + xlab('Coefficient') +
ylab('% of SFARI genes in Module'))
ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
ggplot(aes(coef, MTcor)) + geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) +
geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) +
theme_minimal() + xlab('Coefficient') +
ylab('Module-Diagnosis correlation'))
SFARI genes have a slightly higher score distribution than the rest
plot_data = test_set %>% dplyr::select(prob, SFARI)
plot_data %>% ggplot(aes(prob, fill=SFARI, color=SFARI)) + geom_density(alpha=0.3) + xlab('Score') +
theme_minimal() + ggtitle('Model score distribution by SFARI Label')
There seems to be a positive relation between the SFARI scores and the probability assigned by the model
The number of observations when separating the test set by SFARI score is quite small, so this is not a robust result, specially for scores 1, 2 and 6
plot_data = test_set %>% mutate(ID=rownames(test_set)) %>% dplyr::select(ID, prob) %>%
left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID') %>%
dplyr::select(ID, prob, gene.score) %>% apply_labels(gene.score='SFARI Gene score')
cro(plot_data$gene.score)
|  #Total | |
|---|---|
|  SFARI Gene score | |
| Â Â Â 1Â | 5 |
| Â Â Â 2Â | 12 |
| Â Â Â 3Â | 38 |
| Â Â Â 4Â | 86 |
| Â Â Â 5Â | 32 |
| Â Â Â 6Â | 4 |
|    None | 3027 |
|    #Total cases | 3204 |
mean_vals = plot_data %>% group_by(gene.score) %>% summarise(mean_prob = mean(prob))
ggplotly(plot_data %>% ggplot(aes(gene.score, prob, fill=gene.score)) + geom_boxplot() +
scale_fill_manual(values=SFARI_colour_hue(r=c(1:6,8,7))) +
ggtitle('Distribution of probabilities by SFARI score') +
xlab('SFARI score') + ylab('Probability') + theme_minimal())
rm(mean_vals)
Genes with highest scores in test set
Considering the class imbalance in the train set, there are many SFARI scores in here
There’s not a single gene with a SFARI score of 5 or 6
test_set %>% dplyr::select(prob, SFARI) %>% mutate(ID = rownames(test_set)) %>%
arrange(desc(prob)) %>% top_n(50, wt=prob) %>%
left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID') %>%
left_join(gene_names, by = c('ID'='ensembl_gene_id')) %>%
dplyr::rename('GeneSymbol' = external_gene_id, 'Probability' = prob, 'ModuleDiagnosis_corr' = MTcor, 'GeneSignificance' = GS) %>%
mutate(ModuleDiagnosis_corr = round(ModuleDiagnosis_corr,4), Probability = round(Probability,4),
GeneSignificance = round(GeneSignificance,4)) %>%
dplyr::select(GeneSymbol, GeneSignificance, ModuleDiagnosis_corr, Module, Probability, gene.score) %>%
kable(caption = 'Genes with highest model probabilities from the test set')
| GeneSymbol | GeneSignificance | ModuleDiagnosis_corr | Module | Probability | gene.score |
|---|---|---|---|---|---|
| ARID1B | 0.2711 | 0.1127 | #FF62BC | 0.8711 | 1 |
| KMT2A | 0.1347 | 0.7916 | #00C097 | 0.8511 | 1 |
| TLN2 | -0.4783 | -0.9514 | #00C0AF | 0.8504 | None |
| EP400 | -0.1671 | -0.6031 | #00BA38 | 0.8419 | 3 |
| NAV1 | -0.1734 | -0.6031 | #00BA38 | 0.8409 | None |
| SCN2A | -0.5375 | -0.9514 | #00C0AF | 0.8397 | 1 |
| PDS5B | -0.4463 | -0.9514 | #00C0AF | 0.8353 | None |
| ATN1 | -0.2052 | -0.6031 | #00BA38 | 0.8351 | None |
| BAZ2A | 0.0534 | -0.0094 | #00A7FF | 0.8314 | None |
| EIF4G3 | -0.4827 | -0.8040 | #00B7E9 | 0.8312 | None |
| BICD1 | -0.0390 | 0.0586 | #FE6E8A | 0.8299 | None |
| DAAM1 | 0.1579 | -0.0094 | #00A7FF | 0.8279 | None |
| RIMBP2 | 0.2615 | -0.0094 | #00A7FF | 0.8200 | None |
| PTPRS | -0.3769 | -0.2526 | #F564E3 | 0.8199 | None |
| NFIC | -0.4414 | -0.6031 | #00BA38 | 0.8190 | None |
| DLGAP1 | -0.7790 | -0.9514 | #00C0AF | 0.8158 | 3 |
| PLXNC1 | -0.0088 | -0.0094 | #00A7FF | 0.8143 | None |
| AFF3 | -0.3217 | 0.0586 | #FE6E8A | 0.8138 | None |
| SCAF4 | 0.0185 | -0.6031 | #00BA38 | 0.8126 | None |
| USP32 | -0.6248 | -0.6031 | #00BA38 | 0.8123 | None |
| FRMPD4 | -0.6526 | -0.9514 | #00C0AF | 0.8110 | None |
| CERS6 | -0.4540 | -0.9514 | #00C0AF | 0.8106 | None |
| PPP1R12B | -0.0838 | -0.6031 | #00BA38 | 0.8099 | None |
| GRIN2A | -0.5289 | -0.9514 | #00C0AF | 0.8096 | 3 |
| WNK1 | -0.0192 | 0.2213 | #A3A500 | 0.8089 | None |
| JAZF1 | -0.3136 | -0.9514 | #00C0AF | 0.8080 | None |
| MEF2D | -0.4172 | -0.6031 | #00BA38 | 0.8067 | None |
| RAB7A | -0.4020 | -0.6031 | #00BA38 | 0.8065 | None |
| KIAA0226 | -0.3510 | -0.6031 | #00BA38 | 0.8064 | None |
| PAK2 | 0.2407 | 0.1127 | #FF62BC | 0.8048 | 3 |
| ZNF385A | -0.3231 | -0.6031 | #00BA38 | 0.8045 | None |
| MAP3K13 | -0.4321 | -0.6750 | #D376FF | 0.8032 | None |
| KMT2E | 0.0723 | 0.7916 | #00C097 | 0.8018 | 3 |
| UBAP2L | -0.3318 | -0.6031 | #00BA38 | 0.8015 | None |
| MARK4 | -0.4207 | -0.6031 | #00BA38 | 0.7993 | None |
| AFAP1 | -0.1691 | 0.1127 | #FF62BC | 0.7993 | None |
| PRPF8 | -0.5452 | -0.6031 | #00BA38 | 0.7972 | None |
| RASAL2 | -0.1627 | -0.4891 | #00BF7D | 0.7972 | None |
| TET3 | 0.4863 | 0.7287 | #39B600 | 0.7971 | None |
| KSR2 | -0.4594 | -0.6031 | #00BA38 | 0.7971 | None |
| SLC32A1 | -0.5797 | -0.6031 | #00BA38 | 0.7933 | None |
| BRD4 | 0.0563 | -0.6031 | #00BA38 | 0.7918 | 4 |
| GRIK5 | -0.3145 | -0.6031 | #00BA38 | 0.7913 | 3 |
| TMEM178B | -0.4282 | -0.9514 | #00C0AF | 0.7912 | None |
| UBLCP1 | -0.4921 | -0.9514 | #00C0AF | 0.7899 | None |
| ZNF275 | -0.0260 | -0.6031 | #00BA38 | 0.7893 | None |
| NOVA2 | -0.5467 | -0.6031 | #00BA38 | 0.7874 | None |
| SLC8A2 | -0.0972 | -0.2526 | #F564E3 | 0.7874 | None |
| UBXN7 | 0.1040 | 0.1127 | #FF62BC | 0.7866 | None |
| SEZ6L | -0.3794 | -0.9514 | #00C0AF | 0.7842 | None |
Selecting the Negative samples in the test set
negative_set = dataset %>% filter(!SFARI & !rownames(.) %in% rownames(train_set)) %>% dplyr::select(-SFARI)
rownames(negative_set) = rownames(dataset)[!dataset$SFARI & !rownames(dataset) %in% rownames(train_set)]
negative_set$prob = predict(best_fit, newx=negative_set %>% as.matrix, s=best_lambda, type='response') %>% as.vector
negative_set$pred = negative_set$prob>0.5
negative_set_table = negative_set %>% apply_labels(prob = 'Assigned Probability',
pred = 'Label Prediction')
cro(negative_set_table$pred)
|  #Total | |
|---|---|
|  Label Prediction | |
| Â Â Â FALSEÂ | 8102 |
| Â Â Â TRUEÂ | 4155 |
|    #Total cases | 12257 |
cat(paste0('\n', sum(negative_set$pred), ' genes are predicted as ASD-related'))
##
## 4155 genes are predicted as ASD-related
negative_set %>% ggplot(aes(prob)) + geom_density(color='#F8766D', fill='#F8766D', alpha=0.5) +
geom_vline(xintercept=0.5, color='#333333', linetype='dotted') +
ggtitle('Probability distribution of the Negative samples in the test set') +
theme_minimal()
There’s a lot of noise, but the genes with the highest probabilities have higher (absolute) Gene Significance
negative_set %>% ggplot(aes(prob, GS, color=MTcor)) + geom_point() + geom_smooth(method='loess', color='#666666') +
geom_hline(yintercept=0, color='gray', linetype='dashed') +
scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) +
ggtitle('Relation between Probability and Gene Significance') + theme_minimal()
negative_set %>% ggplot(aes(prob, abs(GS), color=MTcor)) + geom_point() +
geom_hline(yintercept=mean(negative_set$absGS), color='gray', linetype='dashed') +
geom_smooth(method='loess', color='#666666') +
scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) +
ggtitle('Relation between Model probability and Gene Significance') + theme_minimal()
On average, the model seems to be assigning a probability inversely proportional to the Module-Diagnosis correlation of the module, with the highest positively correlated modules having the lowest average probability and the highest negatively correlated modules the highest average probability. But the difference isn’t big
negative_set %>% ggplot(aes(MTcor, prob, color=GS)) + geom_point() + geom_smooth(method='loess', color='#666666') +
geom_hline(yintercept=mean(negative_set$prob), color='gray', linetype='dashed') +
scale_color_gradientn(colours=c('#F8766D','#F8766D','white','#00BFC4','#00BFC4')) +
xlab('Modules ordered by their correlation to ASD') + ylab('Model probability') +
theme_minimal()
Summarised version, plotting by module instead of by gene
The difference in the trend lines between this plot and the one above is that the one above takes all the points into consideration while this considers each module as an observation by itself, so the top one is strongly affected by big modules and the bottom one treats all modules the same
The model seems to give higher probabilities to genes belonging to modules with a small (absolute) correlation to Diagnosis, although the difference isn’t much
plot_data = negative_set %>% group_by(MTcor) %>% summarise(mean = mean(prob), sd = sd(prob), n = n()) %>%
mutate(MTcor_sign = ifelse(MTcor>0, 'Positive', 'Negative')) %>% left_join(original_dataset, by='MTcor') %>%
dplyr::select(Module, MTcor, MTcor_sign, mean, sd, n) %>% distinct()
colnames(plot_data)[1] = 'ID'
ggplotly(plot_data %>% ggplot(aes(MTcor, mean, size=n, color=MTcor_sign)) + geom_point(aes(id=ID)) +
geom_smooth(method='loess', color='gray', se=FALSE) + geom_smooth(method='lm', se=FALSE) +
xlab('Module-Diagnosis correlation') + ylab('Mean Probability by Model') + theme_minimal())
There is a positive relation between level of expression and probability, the model seems to be capturing indirectly the level of expression of the genes to make the prediction, so it’s introducing the same bias
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
mean_and_sd = data.frame(ID=rownames(datExpr), meanExpr=rowMeans(datExpr), sdExpr=apply(datExpr,1,sd))
plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>% left_join(mean_and_sd, by='ID') %>%
left_join(original_dataset %>% mutate(ID=rownames(original_dataset)) %>%
dplyr::select(ID, Module), by='ID')
colnames(plot_data)[ncol(plot_data)] = 'Module'
plot_data %>% ggplot(aes(meanExpr, prob)) + geom_point(alpha=0.2, color='#0099cc') +
geom_smooth(method='loess', color='gray', alpha=0.3) +
geom_smooth(method='lm', color='#999999', se=FALSE, alpha=1) +
theme_minimal() + ggtitle('Mean expression vs model probability by gene')
rm(mean_and_sd)
plot_data2 = plot_data %>% group_by(Module) %>% summarise(meanExpr = mean(meanExpr), meanProb = mean(prob), n=n())
ggplotly(plot_data2 %>% ggplot(aes(meanExpr, meanProb, size=n)) + geom_point(color=plot_data2$Module) +
geom_smooth(method='loess', se=TRUE, color='gray', alpha=0.1, size=0.7) +
geom_smooth(method='lm', se=FALSE, color='gray') + theme_minimal() + theme(legend.position='none') +
ggtitle('Mean expression vs model probability by Module'))
rm(plot_data2)
There is a relation between probability and lfc, so it IS capturing a bit of true information (because lfc and mean expression were negatively correlated and it still has a positive relation in the model)
plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>%
left_join(DE_info %>% mutate(ID=rownames(DE_info)), by='ID')
plot_data %>% ggplot(aes(log2FoldChange, prob)) + geom_point(alpha=0.1, color='#0099cc') +
geom_smooth(method='loess', color='gray', alpha=0.3) +
theme_minimal() + ggtitle('lfc vs model probability by gene')
plot_data %>% mutate(DE = padj<0.05) %>% ggplot(aes(log2FoldChange, prob, color=DE)) + geom_point(alpha=0.1) +
geom_smooth(method='loess', alpha=0.3) +
theme_minimal() + ggtitle('lfc vs model probability by gene')
The model is capturing the mean level of expression of the genes (indirectly through module memberhsip), which is a strong bias found in the SFARI scores, but it seems to be capturing a bit of true biological signal as well (based on the GS and the log fold change plots)
save(train_set, test_set, negative_set, best_fit, best_lambda, dataset, file='./../Data/Ridge_model.RData')
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] expss_0.10.2 corrplot_0.84 car_3.0-7 carData_3.0-3
## [5] ROCR_1.0-7 gplots_3.0.3 glmnet_3.0-2 Matrix_1.2-18
## [9] Rtsne_0.15 biomaRt_2.40.5 RColorBrewer_1.1-2 gridExtra_2.3
## [13] viridis_0.5.1 viridisLite_0.3.0 plotly_4.9.2 knitr_1.28
## [17] forcats_0.5.0 stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3
## [21] readr_1.3.1 tidyr_1.0.2 tibble_3.0.0 ggplot2_3.3.0
## [25] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5
## [3] Hmisc_4.4-0 lazyeval_0.2.2
## [5] splines_3.6.3 BiocParallel_1.18.1
## [7] crosstalk_1.1.0.1 GenomeInfoDb_1.20.0
## [9] digest_0.6.25 foreach_1.5.0
## [11] htmltools_0.4.0 gdata_2.18.0
## [13] fansi_0.4.1 magrittr_1.5
## [15] checkmate_2.0.0 memoise_1.1.0
## [17] cluster_2.1.0 openxlsx_4.1.4
## [19] annotate_1.62.0 modelr_0.1.6
## [21] matrixStats_0.56.0 prettyunits_1.1.1
## [23] jpeg_0.1-8.1 colorspace_1.4-1
## [25] blob_1.2.1 rvest_0.3.5
## [27] haven_2.2.0 xfun_0.12
## [29] crayon_1.3.4 RCurl_1.98-1.1
## [31] jsonlite_1.6.1 genefilter_1.66.0
## [33] survival_3.1-11 iterators_1.0.12
## [35] glue_1.3.2 gtable_0.3.0
## [37] zlibbioc_1.30.0 XVector_0.24.0
## [39] DelayedArray_0.10.0 shape_1.4.4
## [41] BiocGenerics_0.30.0 abind_1.4-5
## [43] scales_1.1.0 DBI_1.1.0
## [45] Rcpp_1.0.4 xtable_1.8-4
## [47] progress_1.2.2 htmlTable_1.13.3
## [49] foreign_0.8-75 bit_1.1-15.2
## [51] Formula_1.2-3 stats4_3.6.3
## [53] htmlwidgets_1.5.1 httr_1.4.1
## [55] acepack_1.4.1 ellipsis_0.3.0
## [57] pkgconfig_2.0.3 XML_3.99-0.3
## [59] farver_2.0.3 nnet_7.3-13
## [61] dbplyr_1.4.2 locfit_1.5-9.4
## [63] tidyselect_1.0.0 labeling_0.3
## [65] rlang_0.4.5 AnnotationDbi_1.46.1
## [67] munsell_0.5.0 cellranger_1.1.0
## [69] tools_3.6.3 cli_2.0.2
## [71] generics_0.0.2 RSQLite_2.2.0
## [73] broom_0.5.5 evaluate_0.14
## [75] yaml_2.2.1 bit64_0.9-7
## [77] fs_1.4.0 zip_2.0.4
## [79] caTools_1.18.0 nlme_3.1-144
## [81] xml2_1.2.5 compiler_3.6.3
## [83] rstudioapi_0.11 curl_4.3
## [85] png_0.1-7 reprex_0.3.0
## [87] geneplotter_1.62.0 stringi_1.4.6
## [89] highr_0.8 lattice_0.20-40
## [91] vctrs_0.2.4 pillar_1.4.3
## [93] lifecycle_0.2.0 data.table_1.12.8
## [95] bitops_1.0-6 GenomicRanges_1.36.1
## [97] R6_2.4.1 latticeExtra_0.6-29
## [99] KernSmooth_2.23-16 rio_0.5.16
## [101] IRanges_2.18.3 codetools_0.2-16
## [103] gtools_3.8.2 assertthat_0.2.1
## [105] SummarizedExperiment_1.14.1 DESeq2_1.24.0
## [107] withr_2.1.2 S4Vectors_0.22.1
## [109] GenomeInfoDbData_1.2.1 mgcv_1.8-31
## [111] parallel_3.6.3 hms_0.5.3
## [113] grid_3.6.3 rpart_4.1-15
## [115] rmarkdown_2.1 Biobase_2.44.0
## [117] lubridate_1.7.4 base64enc_0.1-3